vignettes/example-gallery-08-interactive-charts.Rmd
example-gallery-08-interactive-charts.RmdThis document is adapted from the Bar Charts section of the Altair Example Gallery.
Our first step is to set up our environment:
# devtools::install_github("vegawidget/altair")
library("altair")
library("tibble")
library("dplyr")##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
brush <- alt$selection(type = "interval", resolve="global")
base <-
alt$Chart(r_to_py(vega_data$cars()))$
mark_point()$
encode(
y="Miles_per_Gallon",
color=alt$condition(brush, "Origin", alt$ColorValue("gray"))
)$
properties(selection = brush, width = 250, height=250)
chart <- (base$encode(x = "Horsepower") | base$encode( x = "Acceleration"))
chart## Observations: 1,461
## Variables: 6
## $ date <dttm> 2012-01-01, 2012-01-02, 2012-01-03, 2012-01-04,...
## $ precipitation <dbl> 0.0, 10.9, 0.8, 20.3, 1.3, 2.5, 0.0, 0.0, 4.3, 1...
## $ temp_max <dbl> 12.8, 10.6, 11.7, 12.2, 8.9, 4.4, 7.2, 10.0, 9.4...
## $ temp_min <dbl> 5.0, 2.8, 7.2, 5.6, 2.8, 2.2, 2.8, 2.8, 5.0, 0.6...
## $ wind <dbl> 4.7, 4.5, 2.3, 4.7, 6.1, 2.2, 2.3, 2.0, 3.4, 3.4...
## $ weather <chr> "drizzle", "rain", "rain", "rain", "rain", "rain...
weather <- r_to_py(weather)
brush <- alt$selection(type = "interval", encodings = list("x"))
bars <-
alt$Chart()$
mark_bar()$
encode(
x = alt$X("date:O", timeUnit="month"),
y = "mean(precipitation):Q",
opacity = alt$condition(brush, alt$OpacityValue(1), alt$OpacityValue(0.7))
)$
properties(selection = brush)
line <-
alt$Chart()$
mark_rule(color="firebrick")$
encode(
y = "mean(precipitation):Q",
size = alt$SizeValue(3)
)$
transform_filter(brush$ref())
chart <- alt$layer(bars, line, data=weather)
chart## Observations: 3,201
## Variables: 16
## $ Creative_Type <list> [NULL, NULL, NULL, NULL, "Contemporary...
## $ Director <list> [NULL, NULL, NULL, NULL, NULL, NULL, "...
## $ Distributor <list> ["Gramercy", "Strand", "Lionsgate", "F...
## $ IMDB_Rating <dbl> 6.1, 6.9, 6.8, NaN, 3.4, NaN, 7.7, 3.8,...
## $ IMDB_Votes <dbl> 1071, 207, 865, NaN, 165, NaN, 15133, 3...
## $ MPAA_Rating <list> ["R", "R", NULL, NULL, "R", NULL, "R",...
## $ Major_Genre <list> [NULL, "Drama", "Comedy", "Comedy", "D...
## $ Production_Budget <dbl> 8000000, 300000, 250000, 300000, 100000...
## $ Release_Date <list> ["12-Jun-98", "7-Aug-98", "28-Aug-98",...
## $ Rotten_Tomatoes_Rating <dbl> NaN, NaN, NaN, 13, 62, NaN, NaN, NaN, 2...
## $ Running_Time_min <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
## $ Source <list> [NULL, NULL, NULL, NULL, "Original Scr...
## $ Title <list> ["The Land Girls", "First Love, Last R...
## $ US_DVD_Sales <dbl> NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,...
## $ US_Gross <dbl> 146083, 10876, 203134, 373615, 1009819,...
## $ Worldwide_Gross <dbl> 146083, 10876, 203134, 373615, 1087521,...
pts <- alt$selection(type = "single", encodings = list("x"))
rect <-
alt$Chart(r_to_py(movies))$
mark_rect()$
encode(
x = alt$X("IMDB_Rating:Q", bin = TRUE),
y = alt$Y("Rotten_Tomatoes_Rating:Q", bin=TRUE),
color = alt$Color(
"count(IMDB_Rating)",
scale = alt$Scale(scheme = "greenblue"),
legend = alt$Legend(title = "Total Records")
)
)
circ <-
rect$
mark_point()$
encode(
color = alt$ColorValue("grey"),
size = alt$Size(
"count(IMDB_Rating)",
legend = alt$Legend(title = "Records in Selection")
)
)$
transform_filter(pts$ref())
bar <-
alt$Chart(r_to_py(movies))$
mark_bar()$
encode(
x = "Major_Genre:N",
y = "count(Major_Genre)",
color = alt$condition(pts, alt$ColorValue("steelblue"), alt$ColorValue("grey"))
)$
properties(selection = pts, width = 550, height = 200)
chart <-
((rect + circ) & bar)$
resolve_legend(color = "independent", size = "independent")
chartNote: alt$repeat() must be translated to alt$\x60repeat\x60().
## Observations: 2,000
## Variables: 5
## $ date <dttm> 2001-01-14 21:55:00, 2001-03-26 20:15:00, 2001-03...
## $ delay <dbl> 0, -11, -3, 12, 2, 47, 3, -4, 4, 0, 18, -7, -10, 2...
## $ destination <chr> "SMF", "SLC", "LAX", "SNA", "LAX", "AUS", "PHX", "...
## $ distance <dbl> 480, 507, 714, 342, 373, 189, 872, 723, 318, 487, ...
## $ origin <chr> "SAN", "PHX", "ELP", "SJC", "SMF", "DAL", "AUS", "...
brush <- alt$selection_interval(encodings = list("x"))
# Define the base chart, with the common parts of the
# background and highlights
base <-
alt$Chart()$
mark_bar()$
encode(
x = alt$X(
alt$`repeat`("column"),
type = "quantitative",
bin = alt$Bin(maxbins=20)
),
y = "count(delay)"
)$
properties(width = 180, height = 130)
# blue background with selection
background <- base$properties(selection = brush)
# yellow highlights on the transformed data
highlight <-
base$
encode(
color = alt$value("goldenrod")
)$
transform_filter(brush$ref())
# layer the two charts & repeat
chart <-
(background + highlight)$
properties(data = r_to_py(flights))$
transform_calculate("time", "hours(datum.date)")$
`repeat`(column = list("distance", "delay", "time"))
chart## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
This multi-line chart uses an invisible Voronoi tessellation to handle mouseover to identify the nearest point and then highlight the line on which the point falls. It is adapted from amitkamp’s block.
## Observations: 560
## Variables: 3
## $ symbol <chr> "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT", "MSFT",...
## $ date <dttm> 2000-01-01, 2000-02-01, 2000-03-01, 2000-04-01, 2000-0...
## $ price <dbl> 39.81, 36.35, 43.22, 28.37, 25.45, 32.54, 28.40, 28.40,...
highlight <-
alt$selection_single(
on = "mouseover",
fields = list("symbol"),
nearest = TRUE
)
base <-
alt$Chart(r_to_py(vega_data$stocks()))$
encode(
x = "date:T",
y = "price:Q",
color = "symbol:N"
)
points <-
base$mark_circle()$
encode(
opacity = alt$value(0)
)$
properties(selection = highlight, width = 600)
lines <-
base$
mark_line()$
encode(
size = alt$condition(highlight, alt$value(3), alt$value(1))
)
chart <- (points + lines)
chartThis example usex selections and layers to create a multi-line tooltip that tracks the x position of the cursor.
To find the x-position of the cursor, little trick is used: adding some transparent points with only an x encoding (no y encoding) and tie a nearest selection to these, tied to the x field.
Definition
## Observations: 300
## Variables: 3
## $ category <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A"...
## $ x <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
## $ y <dbl> 1.37, 0.81, 1.17, 1.80, 2.20, 2.09, 3.60, 3.51, 5.53,...
# Create a selection that chooses the nearest point & selects based on x-value
nearest <- alt$selection(
type = "single",
nearest = TRUE,
on = "mouseover",
fields = list("x"),
empty = "none"
)
# The basic line
line <-
alt$Chart()$
mark_line(interpolate = "basis")$
encode(
x = "x:Q",
y = "y:Q",
color = "category:N"
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors <-
alt$Chart()$
mark_point()$
encode(
x = "x:Q",
opacity = alt$value(0)
)$
properties(selection = nearest)$
copy()
# Draw points on the line, and highlight based on selection
points <-
line$
mark_point()$
encode(
opacity = alt$condition(nearest, alt$value(1), alt$value(0))
)
# Draw text labels near the points, and highlight based on selection
text <-
line$
mark_text(align = "left", dx = 5, dy = -5)$
encode(
text = alt$condition(nearest, "y:Q", alt$value(" "))
)
# Draw a rule at the location of the selection
rules <-
alt$Chart()$
mark_rule(color = "gray")$
encode(
x = "x:Q"
)$
transform_filter(nearest$ref())
# Put the five layers into a chart and bind the data
chart <-
(line + selectors + points + rules + text)$
properties(data = r_to_py(data), width = 600, height = 300)
chart## Observations: 1,461
## Variables: 6
## $ date <dttm> 2012-01-01, 2012-01-02, 2012-01-03, 2012-01-04,...
## $ precipitation <dbl> 0.0, 10.9, 0.8, 20.3, 1.3, 2.5, 0.0, 0.0, 4.3, 1...
## $ temp_max <dbl> 12.8, 10.6, 11.7, 12.2, 8.9, 4.4, 7.2, 10.0, 9.4...
## $ temp_min <dbl> 5.0, 2.8, 7.2, 5.6, 2.8, 2.2, 2.8, 2.8, 5.0, 0.6...
## $ wind <dbl> 4.7, 4.5, 2.3, 4.7, 6.1, 2.2, 2.3, 2.0, 3.4, 3.4...
## $ weather <chr> "drizzle", "rain", "rain", "rain", "rain", "rain...
scale <- alt$Scale(
domain = list("sun", "fog", "drizzle", "rain", "snow"),
range = list("#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd")
)
color <- alt$Color("weather:N", scale = scale)
# We create two selections:
# - a brush that is active on the top panel
# - a multi-click that is active on the bottom panel
brush <- alt$selection_interval(encodings = list("x"))
click <- alt$selection_multi(encodings = list("color"))
# Top panel is scatter plot of temperature vs time
points <-
alt$Chart()$
mark_point()$
encode(
x = alt$X(
"date:T",
timeUnit = "monthdate",
axis=alt$Axis(title="Date")
),
alt$Y(
"temp_max:Q",
axis = alt$Axis(title = "Maximum Daily Temperature (C)"),
scale = alt$Scale(domain = list(-5, 40))
),
color = alt$condition(brush, color, alt$value("lightgray")),
size = alt$Size("precipitation:Q", scale = alt$Scale(range = list(5, 200)))
)$
properties(width = 600, height = 300, selection = brush)$
transform_filter(click$ref())
# Bottom panel is a bar chart of weather type
bars <-
alt$Chart()$
mark_bar()$
encode(
x = "count(weather)",
y = "weather:N",
color = alt$condition(click, color, alt$value("lightgray"))
)$
transform_filter(brush$ref())$
properties(width = 600, selection = click)
chart <-
(points & bars)$
properties(
data = r_to_py(vega_data$seattle_weather()),
title = "Seattle Weather: 2012-2015"
)
chartThis example shows a selection that links two views of data: the left panel contains one point per object, and the right panel contains one line per object. Clicking on either the points or lines will select the corresponding objects in both views of the data.
The challenge lies in expressing such hierarchical data in a way that Altair can handle. We do this by merging the data into a “long form” dataframe, and aggregating identical metadata for the final plot.
Definition
n_objects <- 20
n_times <- 50
# Create one (x, y) pair of metadata per object
locations <-
tibble(
id = seq(1, n_objects),
x = rnorm(n_objects),
y = rnorm(n_objects)
)
# Create a 50-element time-series for each object
timeseries <-
crossing(
id = seq(1, n_objects),
time = seq(1, n_times)
) %>%
mutate(value = rnorm(n())) %>%
group_by(id) %>%
mutate(value = cumsum(value)) %>%
ungroup()
# Merge the (x, y) metadata into the long-form view
data <- left_join(timeseries, locations, by = "id")## Observations: 1,000
## Variables: 5
## $ id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ time <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ value <dbl> -0.02142707, 0.64907101, 0.21445397, -0.89942582, -0.292...
## $ x <dbl> -0.004620768, -0.004620768, -0.004620768, -0.004620768, ...
## $ y <dbl> -0.4845954, -0.4845954, -0.4845954, -0.4845954, -0.48459...
selector <- alt$selection_single(empty = "all", fields = list("id"))
points <-
alt$Chart(r_to_py(data))$
mark_point(filled = TRUE, size = 200)$
encode(
x = "mean(x)",
y = "mean(y)",
color = alt$condition(
selector,
"id:O",
alt$value("lightgray"),
legend = NULL
)
)$
properties(selection = selector, width = 250, height = 250)$
interactive()$
copy()
timeseries <-
alt$Chart(r_to_py(data))$
mark_line()$
encode(
x = "time",
y = alt$Y("value", scale = alt$Scale(domain = c(-15, 15))),
color = alt$Color("id:O", legend = NULL)
)$
transform_filter(selector)$
properties(selection = selector, width = 250, height = 250)
points | timeseriesThis chart shows an example of using an interval selection to filter the contents of an attached histogram, allowing the user to see the proportion of items in each category within the selection.
TODO: find a way to fix the numerical scale of the bar chart
## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
cars <- r_to_py(vega_data$cars())
brush <- alt$selection(type="interval")
points <-
alt$Chart()$
mark_point()$
encode(
x = "Horsepower:Q",
y = "Miles_per_Gallon:Q",
color = alt$condition(brush, "Origin:N", alt$value("lightgray"))
)$
properties(selection = brush)
bars <-
alt$Chart()$
mark_bar()$
encode(
x = "count(Origin):Q",
y = "Origin:N",
color = "Origin:N"
)$
transform_filter(brush$ref())
chart <- alt$vconcat(points, bars, data = cars)
chart## Observations: 406
## Variables: 9
## $ Acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5,...
## $ Cylinders <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, ...
## $ Displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, ...
## $ Horsepower <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, ...
## $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, ...
## $ Name <chr> "chevrolet chevelle malibu", "buick skylark 3...
## $ Origin <chr> "USA", "USA", "USA", "USA", "USA", "USA", "US...
## $ Weight_in_lbs <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 431...
## $ Year <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-...
This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time.
## Observations: 570
## Variables: 4
## $ age <dbl> 0, 0, 5, 5, 10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35,...
## $ people <dbl> 1483789, 1450376, 1411067, 1359668, 1260099, 1216114, 1...
## $ sex <dbl> 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1...
## $ year <dbl> 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1...
pop <- r_to_py(vega_data$population())
pink_blue <-
alt$Scale(
domain = list("Male", "Female"),
range = list("steelblue", "salmon")
)
slider <- alt$binding_range(min = 1900, max = 2000, step = 10)
year <- alt$selection_single(
name = "year",
fields = list("year"),
bind = slider
)
chart <-
alt$Chart(pop)$
mark_bar()$
encode(
x = alt$X("sex:N", axis = alt$Axis(title = NULL)),
y = alt$Y("people:Q", scale = alt$Scale(domain = c(0, 1.2e7))),
color = alt$Color("sex:N", scale = pink_blue),
column = "age:O"
)$
properties(width = 20, selection = year)$
transform_calculate("sex", JS('if(datum.sex == 1, "Male", "Female")'))$
transform_filter(year$ref())
chart